package com.uusee.crawler.pageprocessor.baike.imdb;

import java.util.Date;


import com.uusee.crawler.util.PageProcessorUtils;
import com.uusee.shipshape.bk.model.Baike;
import com.uusee.util.StringUtils;

public class ImdbMoviePlotsPageProcessor {
	private static final String NAME_REG="<div id=\"imdbleftsec\"><div id=imdbleft2><span class=mn><a href=\"http://www.imdb.cn/title/tt\\d+\">([^<]*?)</a></span>";
	private static final String YEAR_REG="<div id=\"imdbleftsec\"><div id=imdbleft2><span class=mn><a href=\"http://www.imdb.cn/title/tt\\d+\">[^<]*?</a></span> (<a href=\"http://www.imdb.cn/Sections/Years/\\d+\" class=\"blue\">(\\d+)</a>)";
	private static final String SUMMARY_REG="<td ><div id=imdbjqbody>(.*?)</div></td>";
	
	public void innerProcess(String plotsPage,Baike baike) {
		String summary = "";
		try {
			summary = PageProcessorUtils.getValue(SUMMARY_REG, plotsPage);
			if (StringUtils.isNotEmpty(summary)) {
				String name = PageProcessorUtils.getValue(NAME_REG, plotsPage);
				String year = PageProcessorUtils.getValue(YEAR_REG, plotsPage);
				baike.setName(name);
				baike.setYear(year);
				baike.setSynopsis(summary);
				baike.setUpdateDate(new Date());
			}
		} catch ( Exception e ) {
			e.printStackTrace();
			summary="";
		}
	}
}
